#-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
# Mar 24 2024
# Replication script for *analysis* in:
# Electoral Systems And The Autocrat’s Trade-Off: Evidence From The Russian Duma
# World Politics, vol. 76, no. 4, Oct 2024
# Before running: organize data in "data" subfolder, make subfolder "tables" and make subfolder "Figures". 
#-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

rm(list = ls(all = TRUE))
setwd(dirname(rstudioapi::getSourceEditorContext()$path)) # 


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
### READING IN PACKAGES
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
pack <- c("Cairo", 'parallel', 'jsonlite', 'httr', 'caret', 'rdd', 'rdrobust', 'xlsx', 'Rfast',"readxl", "sqldf", "lubridate", "quanteda","rvest", "randomForest", "SnowballC", "tidytext", "topicmodels", "RTextTools", "tm", "zoo", "gdata", "readr", "purrr","stringdist" , "stringi", "data.table", "jsonlite", "XML", "httr", "ggrepel", "classInt", "MASS", "tile", "simcf","sp" ,"plyr", "dplyr", "stringr", "ggplot2", "lfe", "stargazer")
lapply(pack, require, character.only=T); rm(pack)


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
### READING IN DATA
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# SPEECHES
utterance_dta <- fread("data/speeches.csv", encoding = 'UTF-8', na.strings = c('NA', ''))

# ROLL-CALL VOTES
votes1 = fread("data/votes1.csv", na.strings = c('', 'NA'))
votes2 = fread("data/votes2.csv", na.strings = c('', 'NA'))
voting_w_MV = rbind(votes1, votes2)
voting_w_MV[, resistance := ifelse(result=='for', 0, 1)]
voting_w_MV[, resistance_conv := sum(resistance) / .N, by='treat_assign']
voting_w_MV[, absenteeism := ifelse(result=='absent', 1, 0)]
voting_w_MV[, absence_conv := sum(absenteeism) / .N, by='treat_assign']

# TRAVEL
sumFlights <- fread('data/sumFlights.csv')

# VARIETIES OF DEMOCRACY DATASET (INTRODUCTION)
Vdem <- fread('data/V-Dem-CY-Full+Others-v13.csv')

# https://www.v-dem.net/media/filer_public/e0/7f/e07f672b-b91e-4e98-b9a3-78f8cd4de696/v-dem_codebook_v8.pdf
Vdem[, v2elloelsy := na.locf(v2elloelsy, na.rm = FALSE), by='country_text_id'] # assign electoral system as most recent electoral system (still with "elections==1" as condition)

numeric <- 0:12
label <- c('First-past-the-post',  'Two-round system in single-member constituencies', 'Alternative vote in single-member districts', 
           'Block vote in multi-member districts', 'Party block vote in multi-member districts', 'Parallel', 
           'Mixed-member proportional', 'List PR with small multi-member districts', 'List PR with large multi-member districts', 
           'Single-transferable vote in multi-member districts', 'Single non-transferable vote in multi-member districts', 'Limited vote in multi-member districts', 
           'Borda Count in single- or multi-member districts')

for (i in 1:length(numeric)){
  Vdem[v2elloelsy==i, elec_system := label[i]]}
summary(as.factor(Vdem$elec_system))
Vdem_auto <- Vdem[!is.na(v2x_regime) & !is.na(elec_system) & year>1990, 
                  .SD, .SDcols=c('country_name', 'year', 'v2x_regime', 'elec_system', 'v2elloelsy', 'v2xlg_elecreg')] # 0 = closed autocracy, 1 = electoral autocracy
Vdem_auto[, system_change := uniqueN(elec_system), by=c('country_name', 'v2x_regime')]
Vdem_auto[, group := ifelse(v2x_regime<2, 'Autocracy', 'Democracy')]


# SMD CANDIDATES
SMD <- setDT(read_csv('data/SMD_candidates.csv'))

# *** DEFINING CONVOCATION ***
interval <- as.numeric(as.Date(c("1993-01-01", "1995-12-16", "1999-12-19", "2003-12-07", "2007-12-24", "2011-12-21", "2016-10-05", "2020-12-30")))

# *** DEFINING BANDWIDTHS AND BANDWIDTH LABELS ***
bw = c(.35, .25, .15)
bw.label <- paste(' $\\\\Delta\\\\mu$  \\&  ', paste(bw, sep='', collapse = ' \\& '), sep='')

# *** DEFINING SAMPLE LABELS ***
table.name <- c('Sample 1', 'Sample 2')



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
### NUMBERS USED IN TEXT
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# - - - - - - - - - - - - - - - -  - - - - - - - -  
# ELECTORAL AUTOCRACY OUT OF ALL AUTOCRACIES
# - - - - - - - - - - - - - - - -  - - - - - - - -  
Vdem[year==2019 & v2x_regime %in% 0:1 & v2xlg_elecreg==1 , .N] / Vdem[year==2019 & v2x_regime %in% 0:1 , .N] # 0.8735632


# - - - - - - - - - - - - - - - -  - - - - - - - -  
# CHANGES IN ELECTORAL SYSTEM SINCE 1990
# - - - - - - - - - - - - - - - -  - - - - - - - -  
Vdem_auto[v2x_regime %in% 0:1 & system_change>1, uniqueN(country_name)] / Vdem_auto[v2x_regime %in% 0:1, uniqueN(country_name)] # 0.3157895 / 29 countries

# - - - - - - - - - - - - - - - -  - - - - - - - -  
# REDUCTION IN SMD SINCE 1990
# - - - - - - - - - - - - - - - -  - - - - - - - -  
development <- Vdem_auto[v2xlg_elecreg==1, list(sum(grepl('(single-member|Parallel|First-past)', elec_system)), .N), by=c('year', 'group')]
development[, share := V1 / N]
development[year %in% c(1991, 2022), .SD, .SDcols=c(1,2,5)] %>% arrange(group, year)

# - - - - - - - - - - - - - - - -  - - - - - - - -  
# SEAT SHARE WON BY LARGEST PARTY
# - - - - - - - - - - - - - - - -  - - - - - - - -  
# LARGEST PARTY
Vdem[year==2022 & v2x_regime %in% 1 & v2xlg_elecreg==1 , mean(v2ellostsl, na.rm=T)] # 51.186

# - - - - - - - - - - - - - - - -  - - - - - - - -  
# OPPOSITION AUTONOMY
# - - - - - - - - - - - - - - - -  - - - - - - - -  
overview <- Vdem[year==2022 & v2x_regime %in% 0:1 & v2xlg_elecreg==1 & v2psoppaut_ord!=0, summary(as.factor(v2psoppaut_ord))] / 
  Vdem[year==2022 & v2x_regime %in% 0:1 & v2xlg_elecreg==1 & v2psoppaut_ord!=0, .N]
sum(overview[1:2]) # 0.3538462
sum(overview[4]) # 0.1076923

# - - - - - - - - - - - - - - - -  - - - - - - - -  
# TYPES OF OBSTRUCTION
# - - - - - - - - - - - - - - - -  - - - - - - - -  
voting_w_MV[result!='for', round(summary(as.factor(result))/.N, 3)]
# absent abstain against 
# 0.947   0.003   0.050



# - - - - - - - - - - - - - - - -  - - - - - - - -  
# SHARE OF LOCAL DISTRIBUTIVE PREFERENCE
# - - - - - - - - - - - - - - - -  - - - - - - - -
mean(utterance_dta$LocDistPref, na.rm=T) # 0.03584483
mean(utterance_dta$home_ref, na.rm=T)   # 0.2669598

# - - - - - - - - - - - - - - - -  - - - - - - - -  
# INCUMBENCY ADVANTAGE
# - - - - - - - - - - - - - - - -  - - - - - - - - 
dta <- utterance_dta[dual_candidate==T & !duplicated(paste(CHAR_ID, convocation))]
setorder(dta, 'convocation')
dta$list_next <- NA
dta[duplicated(CHAR_ID), .N]
for (i in 1:nrow(dta)){
  match <- utterance_dta[!duplicated(paste(CHAR_ID, convocation)) & convocation == dta$convocation[i]+1 & CHAR_ID == dta$CHAR_ID[i]][['list']]
  if(length(match)>0){
    dta$list_next[i] <- match }else{
      dta$list_next[i] <- NA}}
dta[list_next == '', list_next := NA]
dta[!is.na(list_next) & list==list_next, .N]  / dta[!is.na(list_next), .N] # .76  some majoritarian turning PR after electoral reform, or from PR to majoritarian when mixed system reintroduced



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# FIGURE 1 - Deputies and list affiliation in the Russian Duma
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

convocation <- c('1993\u201295', '1995\u201299', '1999\u20122003', '2003\u201207', '2007\u201211', '2011\u201216', '2016\u201220')
smd <- c(225, 225, 225, 225, 0, 0, 225)
PR <- c(225, 225, 225, 225, 450, 450, 225)

dual_candidates <- utterance_dta[ dual_candidate==T] %>% group_by(list, convocation) %>% summarize(sum =uniqueN(treat_assign))
dual_candidatesTot <- setDT(utterance_dta[ dual_candidate==T] %>% group_by(convocation) %>% summarize(sum =uniqueN(treat_assign)))
dual_candidatesTot[, list:='Total dual-included']
dual_candidatesTot <- dual_candidatesTot[, .SD, .SDcols=c('list', 'convocation', 'sum')]
dual_candidates <- rbindlist(list(dual_candidatesTot, dual_candidates))
names(dual_candidates) <- c('List', 'Convocation', 'Sum')
dual_candidates[List == "PR", List := "Federal party-list"]
dual_candidates[List == "majoritarian", List := "District list"]

conv.dta <- as.data.frame(cbind(rep(convocation, 2), c(smd, PR), c(rep('District list', 7), rep('Federal party-list', 7))), stringsAsFactors = F)
conv.dta$V2 <- as.numeric(conv.dta$V2)
breaks.y <- c(225, seq(0, 450, 50))

text <- data.frame(x = 5.5, y = 490, label = "Pure PR system in place")

plot.dual <- ggplot(conv.dta, aes(x = V1, y = V2)) + xlab('') + ylab('Number of Deputies') + theme_bw() +
  geom_col(aes(fill = V3), col='black', width = 0.8, alpha=.7) + scale_y_continuous(limits = c(0,500), breaks = breaks.y) + 
  geom_segment(inherit.aes = F, aes(x=4.5, y=475, xend=6.5, yend=475))+
  geom_segment(inherit.aes = F, aes(x=4.5, y=475, xend=4.5, yend=460))+
  geom_segment(inherit.aes = F, aes(x=6.5, y=475, xend=6.5, yend=460))+
  geom_text(data=text, aes(x = x, y = y, label =label), size = 5, family = "Times New Roman")+
  geom_bar(data=dual_candidates, width = 0.5, inherit.aes = FALSE, position="dodge", 
           stat = 'identity', aes(x=Convocation, y=Sum, fill=List),  col='black')+ 
  theme(legend.position="bottom", legend.title=element_blank(), axis.text.x= element_text(size=12), legend.text = element_text(size=18),
        axis.text.y= element_text(size=14), axis.title.x = element_text(size=18),  axis.title.y = element_text(size=18), 
        legend.spacing.x = unit(.5, 'cm'), text = element_text(family = "Times New Roman"))+
  scale_fill_manual(breaks = c("Federal party-list", "District list", 'Total dual-included'),
                    values = c("cornsilk4", "antiquewhite", "gray30")) 
plot.dual

ggsave(file="Figures/Figure1.tiff", plot.dual, width = 8, height = 6, device = "tiff", dpi = 100)


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table 2 - Local representation ML measure
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

datasets <- list()
datasets[[1]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T) & convocation<5]; datasets[[1]][, home_ref_conv := LocDistPref_convocation] # year > 1999 &
datasets[[2]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') & convocation<5]; datasets[[2]][, home_ref_conv := LocDistPref_convocation] #year > 1999 &
table.name.expand <- c('Sample 1', 'Sample 2')

for (D in 1:length(datasets)){
  print(rdbwselect(y=datasets[[D]][['home_ref_conv']], x=datasets[[D]][['SMD_margin_victory']],  
                   c = 0, bwselect = "mserd")$bws[1])}
# [1] 0.2024985
# [1] 0.1938515

stargazer.table <- Diff <- list()
for (D in 1:length(datasets)){
  models <- list()
  sd <- mean <- NULL
  for (i in 1:length(bw)){
    sd <- c(sd, sd(datasets[[D]][ abs(SMD_margin_victory) < bw[i]][['home_ref_conv']], na.rm=T))
    mean <- c(mean, mean(datasets[[D]][ abs(SMD_margin_victory) < bw[i] & list_numeric == 0][['home_ref_conv']], na.rm=T))
    
    datasets[[D]][, X := I(SMD_margin_victory>0)]
    Diff[[i]] <- felm(home_ref_conv ~  X  + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[D]] )
    models[[i]] <- felm(home_ref_conv ~  X*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[D]][ abs(SMD_margin_victory) < bw[i]]  )}
  
  stargazer.table[[D]] <- capture.output(stargazer(list(Diff[[1]], models), header = F, omit = '(y$|\\)$|Intercept|seniority|votemargin|urban|Constant|trips|age|gender|PRFedReg_num|United_Russia|seniority)', covariate.labels = c('$SMD$'),
                                                   digits = 2, title='Local information I: ML outcome', digits.extra = 1, dep.var.labels = table.name.expand[D],
                                                   
                                                   notes.append = F, notes.align='l', column.sep.width = "-2pt", model.numbers = T, style='apsr', 
                                                   add.lines = list(c('\\\\[-1.5ex] \\hline \\\\[-1.5ex] $\\sigma_y$', paste(round(c(sd[1], sd), 2))), 
                                                                    c('$\\bar{y}_0$', paste(round(c(mean[1], mean), 2)))),
                                                   label = 'fig:content1', omit.stat = c('adj.rsq', 'ser', 'rsq'), font.size = 'scriptsize' ))
  stargazer.table[[D]] <- paste(stargazer.table[[D]], sep='', collapse = '')
  if(D==1){stargazer.table[[D]] <- sub('\\& \\(1\\) \\& \\(2\\) \\& \\(3\\) \\& \\(4\\)', paste('Bandwidth &', bw.label, '\\\\\\\\[.5ex]'), stargazer.table[[D]])}  # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- sub('.*(\\& \\\\multicolumn\\{4.*)', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- paste('\\\\\\\\[-3ex]', sub('(.*}).*(\\$SMD)', '\\1 \\\\\\\\[1ex] \\2', stargazer.table[[D]]), sep='')}
  if(D!=length(datasets)){stargazer.table[[D]] <- sub('(.*)\\\\multicolumn\\{5.*', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
}
stargazer.table <- gsub('\\multicolumn{5}{l}{$^{*}$p $<$ .1; $^{**}$p $<$ .05; $^{***}$p $<$ .01}', '', stargazer.table, fixed=T)
notes = '$Notes$: *p$<$.1; **p$<$.05; ***p$<$.01. From second column all models of the form $R_{ic}= \\beta_1 SMD_{ic} + \\beta_2 MV_{ic} + \\beta_3 SMD_{ic} \\times MV_{ic} +\\varepsilon$, but only the first term is reported. Models control for age, gender, seniority, regional or nationwide PR mandate, and United Russia affilation. Column 1 is the difference in means for $bw=1$ including controls. Dependent variable is the share of speeches containing local reference according to the machine learning classifier. Sample 1 includes only dual included deputies, Sample 2 additionally includes all majoritarian candidates. Std.errors clustered at the deputy-convocation level, and convocation fe. For further information on the machine learning measure, see Section \\ref{app:ML}. Optimal bandwidths \\citep{calonico2014robust} from top to bottom are: 0.20 and 0.19'

write(gsub('\\[-4ex\\]', '[-5ex\\]', gsub('\\[.5ex\\]', '[-2ex\\]',  gsub('\\[-1.5ex\\]', '[-2.5ex\\]', gsub('\\[-3ex\\]', '[-4ex\\]', 
                                                                                                             paste(gsub(stargazer.table, pattern='\\end{tabular}',
                                                                                                                        replacement=paste('\\end{tabular}', '\n',
                                                                                                                                          '\\centerline{\\begin{minipage}{0.95\\textwidth}~\\', '\n',
                                                                                                                                          '\\footnotesize{' , notes, 
                                                                                                                                          '} \\end{minipage}}',  sep=''), fixed=T), sep='', collapse = ''))))), file='tables/T2.tex')

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table 3 - Local representation dictionary measure
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

datasets <- list()
datasets[[1]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T) & convocation<5] # year > 1999 &
datasets[[2]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') & convocation<5] # year > 1999 &
table.name.expand <- c('Sample 1', 'Sample 2')

for (D in 1:length(datasets)){
  print(rdbwselect(y=datasets[[D]][['home_ref_conv']], x=datasets[[D]][['SMD_margin_victory']],  
                   c = 0, bwselect = "mserd")$bws[1])}
# [1] 0.1452994
# [1] 0.1501651

stargazer.table <- Diff <- list()
for (D in 1:length(datasets)){
  models <- list()
  sd <- mean <- NULL
  for (i in 1:length(bw)){
    sd <- c(sd, sd(datasets[[D]][ abs(SMD_margin_victory) < bw[i]][['home_ref_conv']], na.rm=T))
    mean <- c(mean, mean(datasets[[D]][ abs(SMD_margin_victory) < bw[i] & list_numeric == 0][['home_ref_conv']], na.rm=T))
    
    datasets[[D]][, X := I(SMD_margin_victory>0)]
    Diff[[i]] <- felm(home_ref_conv ~  X  + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[D]] )
    models[[i]] <- felm(home_ref_conv ~  X*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[D]][ abs(SMD_margin_victory) < bw[i]]  )}
  
  stargazer.table[[D]] <- capture.output(stargazer(list(Diff[[1]], models), header = F, omit = '(y$|\\)$|Intercept|seniority|votemargin|urban|Constant|trips|age|gender|PRFedReg_num|United_Russia|seniority)', covariate.labels = c('$SMD$'),
                                                   digits = 2, title='Local information II: dictionary measure', digits.extra = 1, dep.var.labels = table.name.expand[D],
                                                   
                                                   notes.append = F, notes.align='l', column.sep.width = "-2pt", model.numbers = T, style='apsr', 
                                                   add.lines = list(c('\\\\[-1.5ex] \\hline \\\\[-1.5ex] $\\sigma_y$', paste(round(c(sd[1], sd), 2))), 
                                                                    c('$\\bar{y}_0$', paste(round(c(mean[1], mean), 2)))),
                                                   label = 'fig:content2', omit.stat = c('adj.rsq', 'ser', 'rsq'), font.size = 'scriptsize' ))
  stargazer.table[[D]] <- paste(stargazer.table[[D]], sep='', collapse = '')
  if(D==1){stargazer.table[[D]] <- sub('\\& \\(1\\) \\& \\(2\\) \\& \\(3\\) \\& \\(4\\)', paste('Bandwidth &', bw.label, '\\\\\\\\[.5ex]'), stargazer.table[[D]])}  # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- sub('.*(\\& \\\\multicolumn\\{4.*)', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- paste('\\\\\\\\[-3ex]', sub('(.*}).*(\\$SMD)', '\\1 \\\\\\\\[1ex] \\2', stargazer.table[[D]]), sep='')}
  if(D!=length(datasets)){stargazer.table[[D]] <- sub('(.*)\\\\multicolumn\\{5.*', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
}
stargazer.table <- gsub('\\multicolumn{5}{l}{$^{*}$p $<$ .1; $^{**}$p $<$ .05; $^{***}$p $<$ .01}', '', stargazer.table, fixed=T)
notes = '$Notes$: *p$<$.1; **p$<$.05; ***p$<$.01. From second column all models of the form $R_{ic}= \\beta_1 SMD_{ic} + \\beta_2 MV_{ic} + \\beta_3 SMD_{ic} \\times MV_{ic} +\\varepsilon$, but only the first term is reported. Models control for age, gender, seniority, regional or nationwide PR mandate, and United Russia affilation. Column 1 is the difference in means for $bw=1$ including controls. Dependent variable is the average number of local references in text (dictionary measure). Sample 1 includes only dual included deputies, Sample 2 additionally includes all majoritarian candidates. Std.errors clustered at the deputy-convocation level, and convocation fe. Optimal bandwidths \\citep{calonico2014robust} from top to bottom are: 0.15 and 0.15'

write(gsub('\\[-4ex\\]', '[-5ex\\]', gsub('\\[.5ex\\]', '[-2ex\\]',  gsub('\\[-1.5ex\\]', '[-2.5ex\\]', gsub('\\[-3ex\\]', '[-4ex\\]', 
                                                                                                             paste(gsub(stargazer.table, pattern='\\end{tabular}',
                                                                                                                        replacement=paste('\\end{tabular}', '\n',
                                                                                                                                          '\\centerline{\\begin{minipage}{0.95\\textwidth}~\\', '\n',
                                                                                                                                          '\\footnotesize{' , notes, 
                                                                                                                                          '} \\end{minipage}}',  sep=''), fixed=T), sep='', collapse = ''))))), file='tables/T3.tex')

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table 4 - Voting Obstruction
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

datasets <- list()
datasets[[1]] <- voting_w_MV[ !duplicated(treat_assign) & dual_candidate==T & convocation<5] #  year > 1999 & 
datasets[[2]] <- voting_w_MV[ !duplicated(treat_assign) & (dual_candidate==T | !is.na(SMD_margin_victory)) & convocation<5] #  year > 1999 & 

for (D in 1:length(datasets)){
  print(rdbwselect(y=datasets[[D]][['resistance_conv']], x=datasets[[D]][['SMD_margin_victory']],  
                   c = 0, bwselect = "mserd")$bws[1])}
# [1] 0.144458
# [1] 0.1964151

stargazer.table <- list(); Diff <- list()
for (D in 1:length(datasets)){
  models <- list()
  sd <- NULL; mean <- NULL
  for (i in 1:length(bw)){
    sd <- c(sd, sd(datasets[[D]][ abs(SMD_margin_victory) < bw[i]][['resistance_conv']], na.rm=T))
    mean <- c(mean, mean(datasets[[D]][ abs(SMD_margin_victory) < bw[i] & list_numeric == 0][['resistance_conv']], na.rm=T))
    
    datasets[[D]][, X := I(SMD_margin_victory>0)]
    Diff[[i]] <- felm(resistance_conv ~  X +  + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | 0 | 0 | treat_assign, data = datasets[[D]] )
    models[[i]] <- felm(resistance_conv ~  X + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[D]][ abs(SMD_margin_victory) < bw[i]]  )}
  
  stargazer.table[[D]] <- capture.output(stargazer(list(Diff[[1]], models), header = F, omit = '(y$|\\)$|Intercept|Constant|trips|age|gender|PRFedReg_num|United_Russia|seniority)', covariate.labels = c('$SMD$'),
                                                   digits = 2, title='Legislative friction', digits.extra = 3, dep.var.labels = table.name[D],
                                                   notes.append = F, notes.align='l', column.sep.width = "0pt", model.numbers = T, style='apsr', 
                                                   add.lines = list(c('\\\\[-1.5ex] \\hline \\\\[-1.5ex] $\\sigma_y$', paste(round(c(sd[1], sd), 2))), 
                                                                    c('$\\bar{y}_0$', paste(round(c(mean[1], mean), 2)))),
                                                   label = 'fig:absence', omit.stat = c('adj.rsq', 'ser', 'rsq'), font.size = 'scriptsize' ))
  stargazer.table[[D]] <- paste(stargazer.table[[D]], sep='', collapse = '')
  if(D==1){stargazer.table[[D]] <- sub('\\& \\(1\\) \\& \\(2\\) \\& \\(3\\) \\& \\(4\\)', paste('Bandwidth &', bw.label, '\\\\\\\\[.5ex]'), stargazer.table[[D]])}  # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- sub('.*(\\& \\\\multicolumn\\{4.*)', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- paste('\\\\\\\\[-3ex]', sub('(.*}).*(\\$SMD)', '\\1 \\\\\\\\[1ex] \\2', stargazer.table[[D]]), sep='')}
  if(D!=length(datasets)){stargazer.table[[D]] <- sub('(.*)\\\\multicolumn\\{5.*', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
}
stargazer.table <- gsub('\\multicolumn{5}{l}{$^{*}$p $<$ .1; $^{**}$p $<$ .05; $^{***}$p $<$ .01}', '', stargazer.table, fixed=T)
notes = '$Notes$: *p$<$.1; **p$<$.05; ***p$<$.01. From second column all models of the form $R_{ic}= \\beta_1 SMD_{ic} + \\beta_2 MV_{ic} + \\beta_3 SMD_{ic} \\times MV_{ic} +\\varepsilon$, but only the first term is reported. Models control for age, gender, seniority, regional or nationwide PR mandate, and United Russia affilation. Column 1 is the difference in means for $bw=1$ including controls. Dependent variable is the share of voting absence out of all votes across the whole convocation. Sample 1 includes only dual included deputies, Sample 2 additionally includes all majoritarian candidates. Std.errors clustered at the deputy-convocation level. Optimal bandwidths \\citep{calonico2014robust} from top to bottom are: 0.14 and 0.20.'

write(gsub('\\[-4ex\\]', '[-5ex\\]', gsub('\\[.5ex\\]', '[-2ex\\]',  gsub('\\[-1.5ex\\]', '[-2.5ex\\]', gsub('\\[-3ex\\]', '[-4ex\\]', 
                                                                                                             paste(gsub(stargazer.table, pattern='\\end{tabular}',
                                                                                                                        replacement=paste('\\end{tabular}', '\n',
                                                                                                                                          '\\centerline{\\begin{minipage}{0.95\\textwidth}~\\', '\n',
                                                                                                                                          '\\footnotesize{' , notes, 
                                                                                                                                          '} \\end{minipage}}',  sep=''), fixed=T), sep='', collapse = ''))))), file='tables/T4.tex')


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Figure 3 - REGRESSION-DISCONTINUITY
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

dta <- utterance_dta[, c('y', 'x', 'covocation') := list(LocDistPref_convocation, SMD_margin_victory, convocation)]
dta <- dta[!duplicated(treat_assign) & !is.na(SMD_margin_victory) & convocation<5, .SD, .SDcols=c('y', 'x', 'convocation')]
dta[, y := y / sd(y, na.rm=T)]
dta <- dta[abs(x) <= .15] # optimal bw
dta$bins <- cut(dta$x, 100)
dta[, BinMean := mean(as.numeric(y), na.rm=T), by='bins']
dta[, MeanX := mean(as.numeric(x), na.rm=T), by='bins']
dta[, Nobs := sum(.N), by='bins']
dta <- dta[!duplicated(bins)]
dta[, treat := x>0]
sum(dta$Nobs)
rdd_plot  <-   ggplot(data=dta, aes(x=MeanX, y=BinMean, size=Nobs, group=treat)) + theme_bw() + 
  geom_point(alpha=.1) +
  geom_vline(xintercept = 0, lty='dashed') + coord_cartesian(ylim=c(0,3.5)) +
  xlab('District Margin of Victory') + ylab('Local Information\n(standardized)') + ggtitle("Panel A") +
  theme(text = element_text(size = 20, family = "Times New Roman"), legend.key.height = unit(1.8, "line"), legend.position = 'bottom', 
        legend.title=element_text(size=20), legend.text=element_text(size=20)) + 
  geom_smooth(method='lm', se=T, show.legend = F, color='black')
rdd_plot
ggsave(file="Figures/Figure3a.tiff", rdd_plot, width = 8, height = 6, device = "tiff", dpi = 100)


dta <- voting_w_MV[, c('y', 'x') := list(resistance_conv, SMD_margin_victory)]
dta <- dta[!duplicated(treat_assign) & !is.na(SMD_margin_victory) & convocation<5, .SD, .SDcols=c('y', 'x')]
dta[, y := y / sd(y, na.rm=T)]
dta <- dta[abs(x) <= .20]
dta$bins <- cut(dta$x, 100)
dta[, BinMean := mean(as.numeric(y), na.rm=T), by='bins']
dta[, MeanX := mean(as.numeric(x), na.rm=T), by='bins']
dta[, Nobs := sum(.N), by='bins']
dta <- dta[!duplicated(bins)]
dta[, treat := x>0]
sum(dta$Nobs)

rdd_plot2  <-   ggplot(data=dta, aes(x=MeanX, y=BinMean, size=Nobs, group=treat)) + theme_bw() + 
  geom_point(alpha=.1) +
  geom_vline(xintercept = 0, lty='dashed') + scale_y_continuous(limits=c(0,3.5))+
  xlab('District Margin of Victory') + ylab('Legislative Friction\n(standardized)') + ggtitle("Panel B") + 
  theme(text = element_text(size = 20, family = "Times New Roman"), legend.key.height = unit(1.8, "line"), legend.position = 'bottom', 
        legend.title=element_text(size=20), legend.text=element_text(size=20)) + 
  geom_smooth(method='lm', se=T, show.legend = F, color='black')
rdd_plot2
ggsave(file="Figures/Figure3b.tiff", rdd_plot2, width = 8, height = 6, device = "tiff", dpi = 100)



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
### Figure 4 - Assessing the elite mechanism
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
sd(utterance_dta[!duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian')][['home_ref_conv']], na.rm=T) # 0.7734096
sd(utterance_dta[!duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian')][['LocDistPref_convocation']], na.rm=T) # 0.09077114
sd(voting_w_MV[!duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian')][['resistance_conv']], na.rm=T) # 0.2184302

datasets <- list()
utterance_dta[!duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian'), stdrd_home_ref_conv := home_ref_conv / sd(home_ref_conv, na.rm=T)]
voting_w_MV[!duplicated(treat_assign)  & (dual_candidate==T | list=='majoritarian'), stdrd_resistance_conv := resistance_conv / sd(resistance_conv, na.rm=T)]
utterance_dta[!duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian'), stdrd_LocDistPref_convocation := LocDistPref_convocation / sd(LocDistPref_convocation, na.rm=T)]

datasets[[1]] <- utterance_dta[convocation < 5 & !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') ]
datasets[[2]] <- voting_w_MV[convocation < 5 & !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') ]
datasets[[3]] <- utterance_dta[ convocation >= 7 & !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') ]
datasets[[4]] <- voting_w_MV[ convocation >= 7 & !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') ]
bw.plot <- c(.35, 0.25, 0.15)

mean <- obs <- obs2 <- N <- N2 <- sd <-  NULL 
models1 <- models2 <- models3 <- models4 <- models5 <- models6 <- list()
control1 <- control2 <- control3 <- control4 <- control5 <- control6 <- list()
treat1 <- treat2 <- treat3 <- treat4 <- treat5 <- treat6 <- list()
cutoff_mean <- function(model){
  dta <- setDT(data.frame(run=model$X[, colnames(model$X) %in% 'SMD_margin_victory'], outcome=model$fitted.values[,1]))
  dta[, cutt_off := ifelse(abs(run)<0.05, 1, 0)]
  dta <- dta[cutt_off==1, mean(outcome), by=run>0]
  dta <- setorderv(dta, 'run')
  round(unlist(dta[,2]), 2)
}

for (i in 1:length(bw.plot)){
  
  # PRE REFORM
  model1 <- felm(stdrd_home_ref_conv ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[1]][abs(SMD_margin_victory) < bw.plot[i]], keepX=T)
  model1_nonstd <- felm(home_ref_conv ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[1]][abs(SMD_margin_victory) < bw.plot[i]], keepX=T)
  models1[[i]] <- summary(model1)$coefficients[1,]
  control1[[i]] <- cutoff_mean(model1_nonstd)[1]
  treat1[[i]] <- cutoff_mean(model1_nonstd)[2]
  
  model2 <- felm(stdrd_resistance_conv ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[2]][ abs(SMD_margin_victory) < bw.plot[i]], keepX=T)
  model2_nonstd <- felm(resistance_conv ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[2]][ abs(SMD_margin_victory) < bw.plot[i]], keepX=T)
  models2[[i]] <- summary(model2)$coefficients[1,]
  control2[[i]] <- cutoff_mean(model2_nonstd)[1]
  treat2[[i]] <- cutoff_mean(model2_nonstd)[2]
  
  model3 <- felm(stdrd_LocDistPref_convocation ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[1]][ abs(SMD_margin_victory) < bw.plot[i]], keepX=T)
  model3_nonstd <- felm(LocDistPref_convocation ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[1]][ abs(SMD_margin_victory) < bw.plot[i]], keepX=T)
  models3[[i]] <- summary(model3)$coefficients[1,]
  control3[[i]] <- cutoff_mean(model3_nonstd)[1]
  treat3[[i]] <- cutoff_mean(model3_nonstd)[2]
  
  # POST REFORM
  model4 <- felm(stdrd_home_ref_conv ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | 0 | 0 | 0, data = datasets[[3]][ abs(SMD_margin_victory) < bw.plot[i]], keepX=T)
  model4_nonstd <- felm(home_ref_conv ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | 0 | 0 | 0, data = datasets[[3]][ abs(SMD_margin_victory) < bw.plot[i]], keepX=T)
  models4[[i]] <- summary(model4)$coefficients[2,]
  control4[[i]] <- cutoff_mean(model4_nonstd)[1]
  treat4[[i]] <- cutoff_mean(model4_nonstd)[2]
  
  model5 <- felm(stdrd_resistance_conv ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | 0 | 0 | 0, data = datasets[[4]][ abs(SMD_margin_victory) < bw.plot[i]], keepX=T)
  model5_nonstd <- felm(resistance_conv ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | 0 | 0 | 0, data = datasets[[4]][ abs(SMD_margin_victory) < bw.plot[i]], keepX=T)
  models5[[i]] <- summary(model5)$coefficients[2,]
  control5[[i]] <- cutoff_mean(model5_nonstd)[1]
  treat5[[i]] <- cutoff_mean(model5_nonstd)[2]
  
  model6 <- felm(stdrd_LocDistPref_convocation ~ I(SMD_margin_victory > 0) * SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | 0 | 0 | 0, data = datasets[[3]][ abs(SMD_margin_victory) < bw.plot[i]], keepX=T)
  model6_nonstd <- felm(LocDistPref_convocation ~ I(SMD_margin_victory > 0) * SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | 0 | 0 | 0, data = datasets[[3]][ abs(SMD_margin_victory) < bw.plot[i]], keepX=T)
  models6[[i]] <- summary(model6)$coefficients[2,]
  control6[[i]] <- cutoff_mean(model6_nonstd)[1]
  treat6[[i]] <- cutoff_mean(model6_nonstd)[2] 
}

models <- lapply(c(models1, models2, models3, models4, models5, models6), function(x){as.data.frame(t(x))})
control <- lapply(c(control1, control2, control3, control4, control5, control6), function(x){as.data.frame(t(x))})
treat <- lapply(c(treat1, treat2, treat3, treat4, treat5, treat6), function(x){as.data.frame(t(x))})
models <- rbindlist(models, fill=T)
control <- rbindlist(control, fill=T)
treat <- rbindlist(treat, fill=T)

models[is.na(`Std. Error`), `Std. Error` := `Cluster s.e.`]
models$post <- as.factor(c(rep(' Pre-reform', nrow(models)/2), rep('Postreform', nrow(models)/2)))
models$bw <- rep(bw.plot, nrow(models)/length(bw.plot))
models$outcome <- c(rep('Local Information', 3), rep('Legislative Friction', 3), rep('Local Information', 3), rep('Local Information', 3), rep('Legislative Friction', 3), rep('Local Information', 3))
models[, col := ifelse(!duplicated(paste(post, bw, outcome)), 'Dictionary measure', 'ML measure')]
models[outcome=='Legislative Friction', col := 'Roll-call measure']
models$cimin <- models$Estimate-(1.96*models$`Std. Error`)
models$cimax <- models$Estimate+(1.96*models$`Std. Error`)
models$order <- rep(-c(1,3,5), 6)
models[col=='ML measure', order := order-.5]
models[, control_mean := gsub('0\\.', '.', round(control$V1, 2))]
models[, treat_mean := gsub('0\\.', '.', round(treat$V1, 2))]

dat_text <- data.frame(label = c("Proregime Behavior", "Antiregime Behavior", 'Antiregime Behavior', 'Proregime Behavior'),
                       outcome   = c('Legislative Friction', 'Legislative Friction', 'Local Information', 'Local Information'),
                       x     = c(0, 0, 0, 0), y     = c(-1.5, 1.5, -1.5, 1.5))

dat_rex <- data.frame(outcome   = c('Legislative Friction', 'Local Information'),
                      xmin=c(-Inf, -Inf), xmax=c(Inf, Inf), ymin=c(0, -Inf), ymax=c(Inf, 0))

TradeOff <- ggplot(data = models, aes(x=order, y=Estimate, ymin=cimin, ymax=cimax, col=col, shape=col)) + geom_errorbar(size=.8,  width = 0.1) + coord_flip() + theme_bw() + 
  geom_hline(yintercept = 0, lty=2) + scale_y_continuous(limits=c(-2.5,2.5)) + scale_x_continuous(limits=c(-6,0), breaks = -c(1.5,3,4.5), labels = bw.plot, position='top') +
  facet_grid(post ~ outcome, switch='y') + ylab('Effect of being marginally elected in a single-member district\n(standardized)') + xlab('Bandwidth') + geom_point() +
  geom_rect(data=dat_rex, inherit.aes = F, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill = "gray80", alpha=.3) + 
  geom_text(data=dat_text, inherit.aes = F, aes(x=x, y=y, label=label), size=3, nudge_x = -.1) + 
  scale_shape_manual(values = c(19,17,15)) + 
  scale_color_manual(values=c('black', 'black', 'gray40')) +
  geom_text(inherit.aes=F, data=models, aes(x=order, y=Estimate, label=paste('(', models$treat_mean, ',', models$control_mean, ')', sep=''), vjust=0), nudge_x=ifelse(models$col=='ML measure', -.4, .2), size=2.5) +
  theme(text = element_text(family = "Times New Roman"), legend.title = element_blank(), legend.position = 'bottom', 
        strip.text = element_text(size=15), legend.text = element_text(size=15), 
        strip.background =element_rect(fill="white"))
TradeOff

ggsave(file="Figures/Figure4.tiff", TradeOff, width = 8, height = 6, device = "tiff", dpi = 100)




#-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
# Feb 17 2024
# Replication script for *appendix* in:
# Electoral Systems And The Autocrat’s Trade-Off: Evidence From The Russian Duma
# World Politics, vol. 76, no. 4, Oct 2024
#-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Figure A1.A - Distribution of electoral system (lower chamber) for most recent election in current autocracies. 
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# DISTRIBUTION OF ELECTORAL SYSTEMS ACROSS AUTOCRACIES (MOST RECENT ELECTION)

electsys_dist <- Vdem[ year>2000 & country_name %in% Vdem[year==2019 & v2x_regime %in% 0:1][['country_name']], .SD, .SDcols=c('country_name', 'year', 'elec_system', 'v2x_regime') ]

electsys_dist <- electsys_dist[v2x_regime %in% 0:1] # trying to only include most recent election. See also !duplicated below.
setorderv(electsys_dist, c('country_name', 'year'))
electsys_dist <- electsys_dist[!is.na(elec_system)][!duplicated(country_name, fromLast = T)] # If several elections took place, take only 
electsys_dist <- electsys_dist[, .N, by='elec_system']
electsys_dist$share <- round(electsys_dist$N / sum(electsys_dist$N), 2)
setorder(electsys_dist, 'share')
electsys_dist[, order := 1:nrow(electsys_dist)]

distribution <- ggplot(electsys_dist, aes(x=order, y=share)) + geom_histogram(stat='identity') + theme_bw() + 
  coord_flip() + theme(axis.text = element_text(size=15), axis.title = element_text(size=15), panel.grid.major.y = element_blank(),) +
  xlab('') + ylab('Share of electoral autocracies') + 
  scale_x_continuous(breaks=electsys_dist$order, label=electsys_dist$elec_system) + 
  scale_y_continuous(limits=c(0, .4)) + geom_text(label=electsys_dist$share, hjust=-1)
distribution


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# FIGURE A1.B - CROSS-COUNTRY CHANGE IN SHARE W. SINGLE-MEMBER DISTRICTS
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

development <- Vdem_auto[v2xlg_elecreg==1, list(sum(grepl('(single-member|Parallel|First-past)', elec_system)), .N), by=c('year', 'group')]
development[, share := V1 / N]
development[year %in% c(1991, 2022), .SD, .SDcols=c(1,2,5)] %>% arrange(group, year)

development_plot <- ggplot(data=development, aes(x=year, y=share, col=group)) + geom_point() + ylim(.0, .75) + theme_bw() + 
  scale_color_manual('', values=c('darkgreen', 'gray70')) + 
  theme(axis.text = element_text(size=15), 
        axis.title = element_text(size=15), 
        legend.title = element_blank(),
        legend.text=element_text(size=15)) + xlab('') + ylab('Share with single-member\n district component') + 
  scale_x_continuous(breaks=seq(1990, 2020, 5))

development_plot

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Figure A2 - Speech Activity Across Period of Investigation
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


plot.dta <- utterance_dta[nchar(utterance)>60 & parliamentarian==TRUE & CHAR_ID!="Chairman"]
overall_mean <- plot.dta[nchar(utterance)>60 & parliamentarian==TRUE & CHAR_ID!="Chairman", 
                         uniqueN(utterance), by='date']

utterance_dta$sum <- 1
sum_day <- utterance_dta %>% filter(nchar(utterance)>60) %>% filter(parliamentarian==TRUE) %>%
  filter(CHAR_ID!="Chairman") %>% group_by(date) %>% summarise(total = sum(sum))
barchart <- utterance_dta[!duplicated(utterance_dta$kodz_vopr),]
barchart <- barchart %>% group_by(date) %>% summarise(total = sum(sum))
barchart$month <- ifelse(as.numeric(gsub("-", "", str_extract(as.Date(barchart$date), "-[0-9]{2}-")))>=6, "-03-15", "-09-15")
barchart$month <- paste(sub("-.*", "", barchart$date), barchart$month, sep="")

barchart <- barchart %>% group_by(month) %>% summarise(mean_day = mean(total))
p <- qplot(as.Date(date),total,data=sum_day) + stat_smooth()
plot.dta <- ggplot_build(p)$data[[2]]

describ_SUM <- ggplot(data=plot.dta, aes(x=as.Date(x), y=y)) + 
  geom_point(data=sum_day, inherit.aes = F, aes(x=as.Date(date), y=total), lwd=0.1, alpha=0.3, shape=1, colour="black") +
  geom_line(size=0.6) + theme_bw() + ylab("Utterances") + xlab("") + 
  theme(axis.text.x  = element_text(size = 20, angle = 60, vjust = 1.3, hjust=1.3), 
        axis.text.y = element_text(size = 20), axis.title=element_text(size=22)) +
  scale_x_date(date_breaks = "2 years", date_labels = "%Y", expand = c(0,0), limits = as.Date(c("1993-10-01", "2019-01-30"))) + 
  geom_ribbon(aes(x=as.Date(x), ymin=ymin, ymax=ymax), col="black", alpha=0, size=0.6, linetype = 2) + 
  geom_bar(data=barchart, aes(x=as.Date(month), y=mean_day), lwd=1, alpha=0.4, stat="identity") + 
  geom_hline(yintercept = overall_mean, linetype="dashed",size=1)
describ_SUM




# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Figure A3 - Speech Activity Across Deputies
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


Speech_dist_dta <- utterance_dta[full_alpha!="" & CHAR_ID!="Chairman", .N, by='full_alpha']
population_deputy <- setDT(left_join(population_deputy, Speech_dist_dta, by=c("full_alpha"="full_alpha"))) # merge into population_density is important. Though app. same nrow, some speakers in Speech_dist_dta will be ministers, and some in population_density will never have spoken.
population_deputy[is.na(N), N := 0] # if not present in Speech_dist_dta, deputies have never spoken in parliament
population_deputy$order <- as.numeric(c(nrow(population_deputy):1))

dist <- as.data.frame(matrix(NA, nrow=3, ncol=5)); names(dist) <- c("y", "Number" , "xend", "x", "label")
dist[, 4] <- rep(0, 3)
quantiles <- quantile(population_deputy$N, seq(0.01, 0.99, 0.01), na.rm=T)
dta <- data.frame(order = names(quantiles), N=quantiles)
dta$order <- as.numeric(gsub('[[:punct:]]', '', dta$order))

overview <- c(10, 100, 1000)
for (i in 1:3){
  dist[i, 1] <- overview[i]
  dist[i, 2] <- max(as.numeric(sub("%", "", names(quantiles)[quantiles<overview[i]+1])))/100
  dist[i,5] <- paste("< ", dist$y[i], " speeches: ", dist[i, 2]*100, "%", sep="")}
dist[, 3] <- round(rep(nrow(Speech_dist_dta), 3) * dist$Number, 0)
dist$x <- rep(850, 3)

Speech_dist <- ggplot(dta, aes(x=order, y=N)) + 
  geom_histogram(stat = "identity") + theme_bw() + xlab("Percent of Deputies") + ylab("Speech frequency")+
  scale_y_log10(breaks=c(0, 3, 10, 30, 100, 300, 1000)) + 
  theme(panel.grid.minor = element_blank(),
        text = element_text(size=20, colour = "black"), 
        axis.text.y = element_text(size=20), axis.title=element_text(size=22)) +
  geom_segment(data=dist, inherit.aes=F, aes(x=0, y=y, yend=y, xend=100), linetype="dashed") +
  geom_text(data=dist, inherit.aes=F, aes(x=15, y=y+c(2, 20, 200), label=label), size=5)
Speech_dist


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table A1 - Differences between dual included deputies and deputies running only on one list
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
vars <- c('woman', 'age', 'seniority', 'PRFedReg_num', 'district_size', 'urban', 'PR_rank') # obs: if changing variables, change '\\\\multicolumn.*{x}'
labels <- c('Woman', 'Age', 'Seniority' , 'Federal (PR)', 'District Size', 'Urban', 'Rank (PR)')

generalization <- utterance_dta
generalization[, SMD_margin_victory := sum(gov_votemargin, na.rm = T) / .N, by=c('full_name', 'convocation')]
generalization[, PR_rank := as.numeric(gsub('[^[:digit:]]', '', PR_rank))]
generalization[, speech_activity := sum(.N),  by=c('full_name', 'convocation')]
generalization <- generalization[!duplicated(paste(full_alpha, convocation)) & parliamentarian==T]
generalization[, woman := ifelse(gender=='female', T, F)]
generalization[, seniority := seniority / 365]
generalization[, urban := grepl('g\\. ', tolower(gov_region))][is.na(urban), urban := F]
generalization[, PRFedReg_num := ifelse(PR_constituency=='federal', 0, 1)]
generalization[, district_size := SMD_tot_countCandidates/1000]
generalization[is.na(dual_candidate), dual_candidate := F]

models <- list()
for (i in 1:length(vars)){
  generalization[, y := generalization[[vars[i]]] ]
  models[[i]] <- felm(y~dual_candidate | convocation | 0 | CHAR_ID, data=generalization)}

inference <-  capture.output(stargazer(models, header = F, covariate.labels = c('Dual Included'),
                                       digits = 3, title='Differences between dual included deputies and deputies running only on one list', digits.extra = 3,
                                       notes = '\\parbox[t]{.7\\textwidth}{$Notes$: *p$<$.1; **p$<$.05; ***p$<$.01.  All models regress dual included status (yes/no) on deputy characteristic. Intercept omitted from table. District size proxied as total votes in the district.District number is used for district location. Seniority is measured as years since first day in parliament. PR regional sublist indicates whether the deputy is registered on the federal part of the PR list.}',  notes.append = F, notes.align='l', column.sep.width = "-3pt", style='apsr',
                                       label = 'fig:inference', omit.stat = c('adj.rsq', 'ser', 'rsq', 'f'), font.size = 'scriptsize' ))
inference <- paste(inference, sep='', collapse = '')
inference <- gsub('\\\\multicolumn.*y}', gsub('^\\&', '', paste(labels, collapse = '\\&')), inference)

write(inference, file='tables/TA1.tex')


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
### Figure A4 - Political competition in the districts
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

sorting_dta <- utterance_dta[!duplicated(paste(full_alpha, convocation)) & dual_candidate == T]
h  = hist(sorting_dta$SMD_margin_victory, breaks = 100)

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
plot(h, xlab="Margin of Victory", ylab='Frequency', main="", 
     ylim=c(0,50), xlim=c(-1, 1), xaxt='n', cex.axis=1.6, cex=1.6, cex.lab=1.6)
axis(side = 1, at=c(-1, -.5, 0, .5, 1), cex.axis=1.6, cex.lab=1.6)
abline(v=0, lwd=2)
abline(h=seq(0, 50, 10), lwd=.6, lty=c(1, rep(3, 5)))



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table A3 - Balance on Observables
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
utterance_dta <- fread("data/speeches.csv", encoding = 'UTF-8', na.strings = c('NA', ''))
sorting_dta <- utterance_dta
sorting_dta[, gov_votemargin := sum(gov_votemargin, na.rm = T) / .N, by=c('full_name', 'convocation')]
sorting_dta[, gov_seniority2 := sum(gov_seniority, na.rm = T) / .N, by=c('full_name', 'convocation')]
sorting_dta <- sorting_dta[!duplicated(paste(full_alpha, convocation)) & dual_candidate == T] # AGGREGATIN TO CONVOCATION LEVEL

sorting_dta[is.na(PR_rank), PR_rank := rank]
sorting_dta[, PR_rank := as.numeric(gsub('[^[:digit:]]', '', PR_rank))]

sorting_dta[district_constituency=='', district_constituency := NA]
sorting_dta[, district_constituency := as.numeric(str_extract(sorting_dta$district_constituency, '[[:digit:]]{1,}'))]
sorting_dta[gender=='', gender := NA]
sorting_dta[, woman := ifelse(gender=='female', T, F)]

sorting_dta[, seniority := log(seniority+1)]
sorting_dta[, United_Russia := fraction=='ЕР']
sorting_dta[, urban := grepl('g\\. ', tolower(gov_region))][is.na(urban), urban := F]
sorting_dta[, PRFedReg_num := ifelse(PR_constituency=='federal', 0, 1)]
sorting_dta[, district_size := SMD_tot_countCandidates/1000]
sorting_dta[, votemargin := SMD_result/SMD_tot_countCandidates]
sorting_dta[, fraud_1stdDigit := gsub('(.*\\.)([[:digit:]]{1})(.*)', '\\2', votemargin)]
sorting_dta[, fraud_2ndDigit := gsub('(.*\\.[[:digit:]]{1})([[:digit:]]{1})(.*)', '\\2', votemargin)]
sorting_dta[, rural_pop := rural_pop / 1000]

sorting_dta$age <- as.numeric(as.Date(sorting_dta$date)-as.Date(sorting_dta$birthdate, format="%d.%m.%Y"))/365 # AGE

sorting_dta[sorting_dta==''] <- NA
outcomes <- c('seniority', 'district_size', 'age', 'PR_rank', 'district_constituency', 'woman', 'PRFedReg_num', 'United_Russia', 'rural_pop',  'urban', 'gov_votemargin', 'gov_seniority2', 'fraud_2ndDigit') #
names <- c('Seniority', 'District votes (k)', 'Age', 'Rank, PR list', 'District location', 'Woman', 'PR regional sublist', 'United Russia', 'Rural population (k, region)', 'Registered in Moscow or Sct. Petersburg', 'Governor vote margin', 'Governor seniority', 'Electoral fraud')
#cbind(outcomes, names)

bw2 <- c(1, bw)
bw.label2 <- paste(paste(bw2, sep='', collapse = ' \\& '), sep='')


models <- list()
stargazer.table <- list()
for (D in seq_along(outcomes)){
  for (i in 1:length(bw2)){
    loopDta <- sorting_dta[abs(SMD_margin_victory) <= bw2[i] ] # add & fraction!='ЕР'
    models[[i]] <- felm( loopDta[[ outcomes[D] ]] ~ I(SMD_margin_victory>0)*SMD_margin_victory | convocation | 0 | treat_assign, data=loopDta, psdef=FALSE)}
  
  stargazer.table[[D]] <- capture.output(stargazer(models, header = F, omit = '(y$|\\)$|t$)', covariate.labels = c('$SMD$'),
                                                   digits = 2, title='Balance on Observables',
                                                   digits.extra = 3, dep.var.labels = paste('\\textbf{', names[D], '}', sep=''),
                                                   notes = '\\parbox[t]{.6\\textwidth}{$Notes$: *p$<$.1; **p$<$.05; ***p$<$.01. All models of the form
                                                   $y= SMD + MV + SMD*MV$. Only the first term is reported. District size proxied as total votes in the district.
                                                   District number is used for district location. Seniority is measured as the log of days since first day in parliament. 
                                                   PR regional sublist indicates whether the deputy is registered on the federal part of the PR list.
                                                   Governor vote margin is the vote share of the respective governor. Electoral fraud is proxied as the 2nd decimal digit of the deputy\'s vote margin.
                                                   Std errors clustered at the deputy-convocation level. Fixed effects at the convocation level.}',
                                                   notes.append = F, notes.align='l', column.sep.width = "10pt", model.numbers = T, style='apsr',
                                                   label = 'fig:balance', omit.stat = c('adj.rsq', 'ser', 'rsq'), font.size = 'scriptsize' ))
  stargazer.table[[D]] <- paste(stargazer.table[[D]], sep='', collapse = '')
  if(D==1){stargazer.table[[D]] <- sub('\\& \\(1\\) \\& \\(2\\) \\& \\(3\\) \\& \\(4\\)', paste('Bandwidth &', bw.label2, '\\\\\\\\[.5ex]'), stargazer.table[[D]])}  # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- sub('.*(\\& \\\\multicolumn\\{4.*)', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- paste('\\hline \\\\\\\\[-3ex]', sub('(.*}).*(\\$SMD)', '\\1 \\\\\\\\[1ex] \\2', stargazer.table[[D]]), sep='')}
  if(D!=length(outcomes)){stargazer.table[[D]] <- sub('(.*)\\\\multicolumn\\{5.*', '\\1', stargazer.table[[D]])}
}

write(gsub('\\[-3ex\\]', '[-5ex\\]', paste(stargazer.table, sep='', collapse = '')),
      file='tables/TA3.tex')



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table A4 - Home region visits
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


datasets <- list()
datasets[[1]] <- sumFlights[ (dual_candidate==T )]
datasets[[2]] <- sumFlights[ !is.na(SMD_margin_victory)]

bw_appendix <- c(0.65,0.5,0.35,0.25,0.15)
bw.label_appendix <- paste(' $\\\\Delta\\\\mu$  \\&  ', paste(bw_appendix, sep='', collapse = ' \\& '), sep='')

stargazer.table <- list(); Diff <- list()
for (D in 1:length(datasets)){
  models <- list()
  sd <- NULL; mean <- NULL
  for (i in 1:length(bw_appendix)){
    sd <- c(sd, sd(datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i]][['HomeRegion_tot']]))
    mean <- c(mean, mean(datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i] & list_numeric == 0][['HomeRegion_tot']]))
    
    datasets[[D]][, X := I(SMD_margin_victory>0)]
    Diff[[i]] <- felm(HomeRegion_tot ~  X +gender+age+total_trips| 0 | 0 | 0, data = datasets[[D]] )
    models[[i]] <- felm(HomeRegion_tot ~  X*SMD_margin_victory+gender+age+total_trips | 0 | 0 | 0, data = datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i]]  )}
  
  stargazer.table[[D]] <- capture.output(stargazer(list(Diff[[1]], models), header = F, omit = '(y$|\\)$|Intercept|Constant|trips|age|gender)', covariate.labels = c('$SMD$'),
                                                   digits = 1, title='Home Region Visits', digits.extra = 1, dep.var.labels = table.name[D],
                                                   notes = '\\parbox[t]{.5\\textwidth}{$Notes$: *p$<$.1; **p$<$.05; ***p$<$.01. We construct a data set of the complete flights purchased by Russian deputies to or from the region containing the district they were elected in (district deputies) or were initially registered in (party list deputies) between April 2004 and November 2006. This data is made available by the flight reservation company Sirena Travel and kindly shared by David Szakonyi. Constituency visits is the sum of departures to and from the federal region in which the deputy was elected. As the largest country in the world measured in landmass, constituency visits are a time consuming business in Russia. Even though flight history only presents one means of transportation, it plausibly picks up the majority of home visits. 
                        Models in column 2:7 of the form $y= SMD + MV + SMD*MV$, but only the first term is reported. Models control for age, gender, and total trips taken. Due to the lower number of observations (data is only available in the fourth convocation from 2004-2006), results are only statistically significant when all district deputies are added in addition to dual included deputies, thereby providing more statistical power. However, the coefficient is always positive.  
                        Column 1 is the difference in means for bw=1.}',
                                                   notes.append = F, notes.align='l', column.sep.width = "-2pt", model.numbers = T, style='apsr', 
                                                   add.lines = list(c('\\\\[-1.5ex] \\hline \\\\[-1.5ex] $\\sigma_y$', paste(round(c(sd[1], sd, 1)))), 
                                                                    c('$\\bar{y}_0$', paste(round(c(mean[1], mean, 1))))),
                                                   label = 'fig:effectflight', omit.stat = c('adj.rsq', 'ser', 'rsq'), font.size = 'scriptsize' ))
  stargazer.table[[D]] <- paste(stargazer.table[[D]], sep='', collapse = '')
  if(D==1){stargazer.table[[D]] <- sub('\\& \\(1\\) \\& \\(2\\) \\& \\(3\\) \\& \\(4\\) \\& \\(5\\) \\& \\(6\\)', paste('Bandwidth &', bw.label_appendix, '\\\\\\\\[.5ex]'), stargazer.table[[D]])}  # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- sub('.*(\\& \\\\multicolumn\\{6.*)', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- paste('\\\\\\\\[-3ex]', sub('(.*}).*(\\$SMD)', '\\1 \\\\\\\\[1ex] \\2', stargazer.table[[D]]), sep='')}
  if(D!=length(datasets)){stargazer.table[[D]] <- sub('(.*)\\\\multicolumn\\{7.*', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
}


write(gsub('\\[-4ex\\]', '[-5ex\\]', gsub('\\[.5ex\\]', '[-2ex\\]', gsub('\\[-1.5ex\\]', '[-2.5ex\\]', gsub('\\[-3ex\\]', '[-4ex\\]', paste(stargazer.table, sep='', collapse = ''))))),
      file='tables/TA4.tex')


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table A5: Effort
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


datasets <- list()
datasets[[1]] <- utterance_dta[  !duplicated(treat_assign) & (dual_candidate==T)]
datasets[[2]] <- utterance_dta[  !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian')]
datasets[[3]] <- sumFlights[   (dual_candidate==T)]
datasets[[4]] <- sumFlights[  (dual_candidate==T | !is.na(SMD_margin_victory))]

vars <- c('NoSpeeches_conv', 'NoSpeeches_conv', 'total_trips', 'total_trips')
table.name3 <- c('Speech Effort (dual included deputies)', 'Speech Effort (dual included and all district deputies', 
                 'Total Flights (dual included deputies)', 'Total Flights (dual included and all district deputies')

stargazer.table <- list(); Diff <- list()
for (D in 1:length(datasets)){
  models <- list()
  sd <- NULL; mean <- NULL
  for (i in 1:length(bw_appendix)){
    datasets[[D]][, Y := datasets[[D]][[vars[D]]] ]
    if (D %in% c(1,2)){
      Diff[[i]] <- felm(Y ~  I(SMD_margin_victory>0) + gender + age + log(seniority+1) | 0 | 0 | treat_assign, data = datasets[[D]] )
      models[[i]] <- felm(Y ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) | convocation | 0 | treat_assign, data = datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i]]  )}else{
        Diff[[i]] <- felm(Y ~  I(SMD_margin_victory>0) + gender + age | 0 | 0 | 0, data = datasets[[D]]  )
        models[[i]] <- felm(Y ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age | 0 | 0 | 0, data = datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i]]  ) }}
  
  stargazer.table[[D]] <- capture.output(stargazer(list(Diff[[1]], models), header = F, omit = '(y$|\\)$|Intercept|Constant|trips|age|gender)', covariate.labels = c('$SMD$'),
                                                   digits = 2, title='General Effort', digits.extra = 3, dep.var.labels = table.name3[D],
                                                   notes = '\\parbox[t]{.55\\textwidth}{$Notes$: *p$<$.1; **p$<$.05; ***p$<$.01.
                        Models in column 2:7 of the form $y= SMD + MV + SMD*MV$, but only the first term is reported. Models control for age and gender. 
                        Column 1 is the difference in means for bw=1. Dependent variable is the number of speeches across the whole convocation and total flight trips taken. For description of flight information, see Table \\ref{fig:effectflight}.
                                                   Std.errors clustered at the deputy-convocation level (i.e. level of treatment assignment). }',
                                                   notes.append = F, notes.align='l', column.sep.width = "0pt", model.numbers = T, style='apsr', 
                                                   label = 'fig:activity', omit.stat = c('adj.rsq', 'ser', 'rsq'), font.size = 'scriptsize' ))
  stargazer.table[[D]] <- paste(stargazer.table[[D]], sep='', collapse = '')
  if(D==1){stargazer.table[[D]] <- sub('\\& \\(1\\) \\& \\(2\\) \\& \\(3\\) \\& \\(4\\) \\& \\(5\\) \\& \\(6\\)', paste('Bandwidth &', bw.label_appendix, '\\\\\\\\[.5ex]'), stargazer.table[[D]])}  # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- sub('.*(\\& \\\\multicolumn\\{6.*)', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- paste('\\\\\\\\[-3ex]', sub('(.*}).*(\\$SMD)', '\\1 \\\\\\\\[1ex] \\2', stargazer.table[[D]]), sep='')}
  if(D!=length(datasets)){stargazer.table[[D]] <- sub('(.*)\\\\multicolumn\\{7.*', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
}

write(gsub('\\[-3ex\\]', '[-4ex\\]', paste(stargazer.table, sep='', collapse = '')),
      file='tables/TA5.tex')


###############################################################################
# Figure A6 - Political competition in the districts
###############################################################################

SMD[, tot_countCandidates := sum(result),  by=list(district, election)]
SMD[, sd.district.result:= sd(result) / tot_countCandidates, by=list(district, election)]
SMD[, winner_count:= max(result),          by=list(district, election)]
SMD[, elected:= result==winner_count ]
SMD[elected==F, runnerup_count := max(result), by=list(district, election)]
SMD[, runnerup := result==runnerup_count, by=list(district, election)]
SMD[, runnerup_count := mean(runnerup_count, na.rm=T), by=list(district, election)] # adding runnerup_count to district winner (excluded above). All values the same so mean gives value

# IF CANDIDATE LOST: MARIGN OF VICTORY DEFINES AS DISTANCE TO WINNER *in percentage points*
SMD[elected==F, margin_victory := (result - winner_count )  / tot_countCandidates] # 
SMD[elected==T, margin_victory := (result - runnerup_count) / tot_countCandidates] # 
SMD[elected==T , median(margin_victory)]

number <- SMD[margin_victory>=-.05 & margin_victory<=0.05, .N]   # 525
(number/nrow(SMD))*100                                           # 5.04 pct of all candidates
(number/length(unique(paste(SMD$district, SMD$election))))*100   # 46.7 pct of all candidates
number <- SMD[margin_victory>=-.1 & margin_victory<=0.1, .N]     # 1187
(number/nrow(SMD))*100                                           # 11.4 pct of all candidates

breaks = c(-.1, -.05, 0.05, 0.1)
h = hist(SMD$margin_victory, breaks=80,plot=FALSE)
ccat = cut(h$breaks, c(-Inf, breaks, Inf))

#pdf('MarginVictory_dist.pdf')
plot(h, col=c("white","gray70","gray40", "gray70","white")[ccat], xlab="Margin of Victory (all SMD candidates)", ylab='Frequency', main="", 
     ylim=c(0,600), xlim=c(-1, 1), xaxt='n', cex.axis=1.5, cex=1.5, cex.lab=1.5)
axis(side = 1, at=c(-1, -.5, 0, .5, 1), cex.axis=1.2)
axis(1, breaks, line=-0.7, tck = -0.02, labels = NA)
axis(side = 1, c(-0.14, 0.14), labels=c('-0.1', '0.1'), lwd = 0, line = -.9, cex.axis=1.2)
legend('right', legend=c('5.0 pct (N=1187)', '11.4 pct (N=1187)'), fill=c('gray40', 'gray70'), bty = "n", cex=1.5)
#dev.off()

SMD2 = SMD[elected == T | runnerup == T]
breaks = c(-.1, -.05, 0.05, 0.1)
h = hist(SMD2$margin_victory, breaks=80,plot=FALSE)
ccat = cut(h$breaks, c(-Inf, breaks, Inf))

#pdf('MarginVictory_distTopRunner.pdf')
plot(h, col=c("white","gray70","gray40", "gray70","white")[ccat], xlab="Margin of Victory (top-two candidates)", ylab='', main="", 
     ylim=c(0,600), xlim=c(-1, 1), xaxt='n', cex.axis=1.5, cex=1.5, cex.lab=1.5, yaxt='n')
axis(side = 1, at=c(-1, -.5, 0, .5, 1), cex.axis=1.2)
axis(1, breaks, line=-0.7, tck = -0.02, labels = NA)
axis(side = 1, c(-0.14, 0.14), labels=c('-0.1', '0.1'), lwd = 0, line = -.9, cex.axis=1.2)
legend('right', legend=c('18.7 pct (N=420)', '35.8 pct (N=804)'), fill=c('gray40', 'gray70'), bty = "n", cex=1.5)
#dev.off()




# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table A6: PARTY FIXED EFFECTS
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

table.name2 <- c('Local rep. (Dual only)',
                 'Local rep. (Dual and district)',
                 'Local rep. (2nd, dual only)',
                 'Local rep. (2nd, dual and district)',
                 'Law obstruction (Dual only)',
                 'Law obstruction (Dual and district)')
dependent <- c('home_ref_conv', 'home_ref_conv', 'LocDistPref_convocation', 'LocDistPref_convocation', 'resistance_conv', 'resistance_conv')

datasets <- list()
datasets[[1]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T)  & convocation<5  ]
datasets[[2]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') & convocation<5 ]
datasets[[3]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T) & convocation<5 ]
datasets[[4]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') & convocation<5 ]
voting_w_MV[ , fraction := faction]
datasets[[5]] <- voting_w_MV[ !duplicated(treat_assign) & (dual_candidate==T) & convocation<5 ] 
datasets[[6]] <- voting_w_MV[ !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') & convocation<5 ] 


stargazer.table <- list()
for (D in 1:length(datasets)){
  models <- list(); sd <- NULL; mean <- NULL; unique.dep <- NULL; PR <- NULL
  datasets[[D]][, Y := datasets[[D]][[dependent[D]]] ]
  
  mean <- c(mean, mean(datasets[[D]][list == 'PR'][['Y']], na.rm=T))
  sd <- c(sd, sd(datasets[[D]][['Y']], na.rm=T))
  
  for (i in 1:length(bw_appendix)){
    sd <- c(sd, sd(datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i]][['Y']], na.rm=T))
    mean <- c(mean, mean(datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i] & list == 'PR'][['Y']], na.rm=T))
    unique.dep <- c(unique.dep, datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i] &!is.na(gov_votemargin), uniqueN(treat_assign)])
    
    Diff[[i]] <- felm(Y ~  I(SMD_margin_victory>0) + gender + age + log(seniority+1) + PRFedReg_num | fraction | 0 | treat_assign, data = datasets[[D]] )
    
    models[[i]] <- felm(Y ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num | fraction | 0 | treat_assign, data = datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i]])}
  
  stargazer.table[[D]] <- capture.output(stargazer(list(Diff[[1]], models), header = F, omit = '(y$|\\)$|Intercept|Constant|trips|age|gender|PRFedReg_num|United_Russia)', covariate.labels = c('SMD'),
                                                   digits = 3, title='Main results with party fixed effects', 
                                                   digits.extra = 3, dep.var.labels = table.name2[D],
                                                   notes = '\\parbox[t]{.7\\textwidth}{$Notes$: *p$<$.1; **p$<$.05; ***p$<$.01.
                        From second column all models of the form $R_{ic}= \\beta_1 SMD_{ic} + \\beta_2 MV_{ic} + \\beta_3 SMD_{ic} \\times MV_{ic} +\\varepsilon$, but only the first term is reported. Models control for age, gender, seniority, and regional or nationwide PR mandate.
                        Column 1 is the difference in means for $bw=1$ including controls. Dependent variable is the average number of local references in text (dictionary measure) and the share of speeches containing local reference (ml measure). Sample 1 includes only dual included deputies, Sample 2 additionally includes all majoritarian candidates.
                                                   Std.errors clustered at the deputy-convocation level, and convocation fe. For further information on the machine learning measure, see Section \\ref{app:ML}.
                                                   }',
                                                   notes.append = F, notes.align='l', column.sep.width = "10pt", model.numbers = T, style='apsr', 
                                                   add.lines = list(c('$\\bar{y}_0$', paste(round(mean, 3))), 
                                                                    c('$\\sigma_y$', paste(round(sd, 3)))),
                                                   label = 'partyFE', omit.stat = c('adj.rsq', 'ser', 'rsq'), font.size = 'scriptsize' ))
  if(D==1){stargazer.table[[D]] <- paste(stargazer.table[[D]][-10], sep='', collapse = '')}
  if(D>1){stargazer.table[[D]] <- paste(stargazer.table[[D]][-c(9:10)], sep='', collapse = '')}
  if(D==1){stargazer.table[[D]] <- sub('\\& \\(1\\) \\& \\(2\\) \\& \\(3\\) \\& \\(4\\) \\& \\(5\\) \\& \\(6\\)', paste('Bandwidth &', bw.label_appendix, '\\\\\\\\[.5ex]'), stargazer.table[[D]])}  # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- sub('.*(\\& \\\\multicolumn\\{6.*)', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
  stargazer.table[[D]] <- sub('$\\bar{y}_0$', '\\hline $\\bar{y}_0$', stargazer.table[[D]], fixed=T)
  if(D>1){stargazer.table[[D]] <- paste('\\\\\\\\[-3ex]', sub('(.*}).*(\\$SMD)', '\\1 \\\\\\\\[1ex] \\2', stargazer.table[[D]]), sep='')}
  if(D!=length(datasets)){stargazer.table[[D]] <- sub('(.*)\\\\multicolumn\\{7.*', '\\1', stargazer.table[[D]])}} # OBS: change bw, change here


write(paste(stargazer.table, sep='', collapse = ''),
      file='tables/TA6.tex')



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table A7 - Main results excluding observations within 0.05 of the threshold
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


table.name2 <- c('Local rep. (Dual only)',
                 'Local rep. (Dual and district)',
                 'Local rep. (2nd, dual only)',
                 'Local rep. (2nd, dual and district)',
                 'Law obstruction (Dual only)',
                 'Law obstruction (Dual and district)')
dependent <- c('home_ref_conv', 'home_ref_conv', 'LocDistPref_convocation', 'LocDistPref_convocation', 'resistance_conv', 'resistance_conv')

datasets <- list()
datasets[[1]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T) & convocation<5 & abs(SMD_margin_victory) > .05]
datasets[[2]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') & convocation<5  & abs(SMD_margin_victory) > .05]
datasets[[3]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T) & convocation<5  & abs(SMD_margin_victory) > .05]
datasets[[4]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') & convocation<5  & abs(SMD_margin_victory) > .05]
datasets[[5]] <- voting_w_MV[ !duplicated(treat_assign) & (dual_candidate==T) & convocation<5  & abs(SMD_margin_victory) > .05] 
datasets[[6]] <- voting_w_MV[ !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') & convocation<5  & abs(SMD_margin_victory) > .05] 


stargazer.table <- list()
for (D in 1:length(datasets)){
  models <- list(); sd <- NULL; mean <- NULL; unique.dep <- NULL; PR <- NULL
  datasets[[D]][, Y := datasets[[D]][[dependent[D]]] ]
  
  mean <- c(mean, mean(datasets[[D]][list == 'PR'][['Y']], na.rm=T))
  sd <- c(sd, sd(datasets[[D]][['Y']], na.rm=T))
  
  for (i in 1:length(bw_appendix)){
    sd <- c(sd, sd(datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i]][['Y']], na.rm=T))
    mean <- c(mean, mean(datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i] & list == 'PR'][['Y']], na.rm=T))
    unique.dep <- c(unique.dep, datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i] &!is.na(gov_votemargin), uniqueN(treat_assign)])
    
    Diff[[i]] <- felm(Y ~  I(SMD_margin_victory>0) + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[D]] )
    
    models[[i]] <- felm(Y ~  I(SMD_margin_victory>0)*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[D]][ abs(SMD_margin_victory) < bw_appendix[i]])}
  
  stargazer.table[[D]] <- capture.output(stargazer(list(Diff[[1]], models), header = F, omit = '(y$|\\)$|Intercept|Constant|trips|age|gender|PRFedReg_num|United_Russia)', covariate.labels = c('SMD'),
                                                   digits = 3, title='Main results excluding observations within 0.05 of the threshold', 
                                                   digits.extra = 3, dep.var.labels = table.name2[D],
                                                   notes = '\\parbox[t]{.7\\textwidth}{$Notes$: *p$<$.1; **p$<$.05; ***p$<$.01. \\\\ All models of the form 
                                                   $y= SMD + MV + GOV + SMD*MV*GOV$, but only the first term is reported. Std.errors clustered at the deputy-convocation level (i.e. level of treatment assignment). 
                                                   Convocation effects at the party level.}',
                                                   notes.append = F, notes.align='l', column.sep.width = "10pt", model.numbers = T, style='apsr', 
                                                   add.lines = list(c('$\\bar{y}_0$', paste(round(mean, 3))), 
                                                                    c('$\\sigma_y$', paste(round(sd, 3)))),
                                                   label = 'donutrdd', omit.stat = c('adj.rsq', 'ser', 'rsq'), font.size = 'scriptsize' ))
  if(D==1){stargazer.table[[D]] <- paste(stargazer.table[[D]][-10], sep='', collapse = '')}
  if(D>1){stargazer.table[[D]] <- paste(stargazer.table[[D]][-c(9:10)], sep='', collapse = '')}
  if(D==1){stargazer.table[[D]] <- sub('\\& \\(1\\) \\& \\(2\\) \\& \\(3\\) \\& \\(4\\) \\& \\(5\\) \\& \\(6\\)', paste('Bandwidth &', bw.label_appendix, '\\\\\\\\[.5ex]'), stargazer.table[[D]])}  # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- sub('.*(\\& \\\\multicolumn\\{6.*)', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
  stargazer.table[[D]] <- sub('$\\bar{y}_0$', '\\hline $\\bar{y}_0$', stargazer.table[[D]], fixed=T)
  if(D>1){stargazer.table[[D]] <- paste('\\\\\\\\[-3ex]', sub('(.*}).*(\\$SMD)', '\\1 \\\\\\\\[1ex] \\2', stargazer.table[[D]]), sep='')}
  if(D!=length(datasets)){stargazer.table[[D]] <- sub('(.*)\\\\multicolumn\\{7.*', '\\1', stargazer.table[[D]])}} # OBS: change bw, change here


write(paste(stargazer.table, sep='', collapse = ''),
      file='tables/TA7.tex')




# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table A8 - Local representation ML measure - EXLUDING THE 1990s
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

datasets <- list()
datasets[[1]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T) & convocation<5 & year > 1999]; datasets[[1]][, home_ref_conv := LocDistPref_convocation] # year > 1999 &
datasets[[2]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') & convocation<5 & year > 1999]; datasets[[2]][, home_ref_conv := LocDistPref_convocation] #year > 1999 &
table.name.expand <- c('Sample 1', 'Sample 2')

for (D in 1:length(datasets)){
  print(rdbwselect(y=datasets[[D]][['home_ref_conv']], x=datasets[[D]][['SMD_margin_victory']],  
                   c = 0, bwselect = "mserd")$bws[1])}

stargazer.table <- Diff <- list()
for (D in 1:length(datasets)){
  models <- list()
  sd <- mean <- NULL
  for (i in 1:length(bw)){
    sd <- c(sd, sd(datasets[[D]][ abs(SMD_margin_victory) < bw[i]][['home_ref_conv']], na.rm=T))
    mean <- c(mean, mean(datasets[[D]][ abs(SMD_margin_victory) < bw[i] & list_numeric == 0][['home_ref_conv']], na.rm=T))
    
    datasets[[D]][, X := I(SMD_margin_victory>0)]
    Diff[[i]] <- felm(home_ref_conv ~  X  + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[D]] )
    models[[i]] <- felm(home_ref_conv ~  X*SMD_margin_victory + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[D]][ abs(SMD_margin_victory) < bw[i]]  )}
  
  stargazer.table[[D]] <- capture.output(stargazer(list(Diff[[1]], models), header = F, omit = '(y$|\\)$|Intercept|seniority|votemargin|urban|Constant|trips|age|gender|PRFedReg_num|United_Russia|seniority)', covariate.labels = c('$SMD$'),
                                                   digits = 2, title='Local information I: ML outcome (excl. 1990s)', digits.extra = 1, dep.var.labels = table.name.expand[D],
                                                   
                                                   notes.append = F, notes.align='l', column.sep.width = "-2pt", model.numbers = T, style='apsr', 
                                                   add.lines = list(c('\\\\[-1.5ex] \\hline \\\\[-1.5ex] $\\sigma_y$', paste(round(c(sd[1], sd), 2))), 
                                                                    c('$\\bar{y}_0$', paste(round(c(mean[1], mean), 2)))),
                                                   label = 'fig:locrep_ML_worldpolitics_1990', omit.stat = c('adj.rsq', 'ser', 'rsq'), font.size = 'scriptsize' ))
  stargazer.table[[D]] <- paste(stargazer.table[[D]], sep='', collapse = '')
  if(D==1){stargazer.table[[D]] <- sub('\\& \\(1\\) \\& \\(2\\) \\& \\(3\\) \\& \\(4\\)', paste('Bandwidth &', bw.label, '\\\\\\\\[.5ex]'), stargazer.table[[D]])}  # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- sub('.*(\\& \\\\multicolumn\\{4.*)', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- paste('\\\\\\\\[-3ex]', sub('(.*}).*(\\$SMD)', '\\1 \\\\\\\\[1ex] \\2', stargazer.table[[D]]), sep='')}
  if(D!=length(datasets)){stargazer.table[[D]] <- sub('(.*)\\\\multicolumn\\{5.*', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
}
stargazer.table <- gsub('\\multicolumn{5}{l}{$^{*}$p $<$ .1; $^{**}$p $<$ .05; $^{***}$p $<$ .01}', '', stargazer.table, fixed=T)
notes = '$Notes$: *p$<$.1; **p$<$.05; ***p$<$.01. From second column all models of the form $R_{ic}= \\beta_1 SMD_{ic} + \\beta_2 MV_{ic} + \\beta_3 SMD_{ic} \\times MV_{ic} +\\varepsilon$, but only the first term is reported. Models control for age, gender, seniority, regional or nationwide PR mandate, and United Russia affilation. Column 1 is the difference in means for $bw=1$ including controls. Dependent variable is the share of speeches containing local reference according to the machine learning classifier. Sample 1 includes only dual included deputies, Sample 2 additionally includes all majoritarian candidates. Std.errors clustered at the deputy-convocation level, and convocation fe. For further information on the machine learning measure, see Section \\ref{app:ML}. Optimal bandwidths \\citep{calonico2014robust} from top to bottom are: 0.16 and 0.16'

write(gsub('\\[-4ex\\]', '[-5ex\\]', gsub('\\[.5ex\\]', '[-2ex\\]',  gsub('\\[-1.5ex\\]', '[-2.5ex\\]', gsub('\\[-3ex\\]', '[-4ex\\]', 
                                                                                                             paste(gsub(stargazer.table, pattern='\\end{tabular}',
                                                                                                                        replacement=paste('\\end{tabular}', '\n',
                                                                                                                                          '\\centerline{\\begin{minipage}{0.95\\textwidth}~\\', '\n',
                                                                                                                                          '\\footnotesize{' , notes, 
                                                                                                                                          '} \\end{minipage}}',  sep=''), fixed=T), sep='', collapse = ''))))), file='tables/TA8.tex')



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table A9 - Voting Obstruction - Excluding the 1990s
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

datasets <- list()
datasets[[1]] <- voting_w_MV[ !duplicated(treat_assign) & dual_candidate==T & convocation<5 & year > 1999] #  & 
datasets[[2]] <- voting_w_MV[ !duplicated(treat_assign) & (dual_candidate==T | !is.na(SMD_margin_victory)) & convocation<5 &  year > 1999] #  year > 1999 & 

for (D in 1:length(datasets)){
  print(rdbwselect(y=datasets[[D]][['resistance_conv']], x=datasets[[D]][['SMD_margin_victory']],  
                   c = 0, bwselect = "mserd")$bws[1])}

stargazer.table <- list(); Diff <- list()
for (D in 1:length(datasets)){
  models <- list()
  sd <- NULL; mean <- NULL
  for (i in 1:length(bw)){
    sd <- c(sd, sd(datasets[[D]][ abs(SMD_margin_victory) < bw[i]][['resistance_conv']], na.rm=T))
    mean <- c(mean, mean(datasets[[D]][ abs(SMD_margin_victory) < bw[i] & list_numeric == 0][['resistance_conv']], na.rm=T))
    
    datasets[[D]][, X := I(SMD_margin_victory>0)]
    Diff[[i]] <- felm(resistance_conv ~  X +  + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | 0 | 0 | treat_assign, data = datasets[[D]] )
    models[[i]] <- felm(resistance_conv ~  X + gender + age + log(seniority+1) + PRFedReg_num + United_Russia | convocation | 0 | treat_assign, data = datasets[[D]][ abs(SMD_margin_victory) < bw[i]]  )}
  
  stargazer.table[[D]] <- capture.output(stargazer(list(Diff[[1]], models), header = F, omit = '(y$|\\)$|Intercept|Constant|trips|age|gender|PRFedReg_num|United_Russia|seniority)', covariate.labels = c('$SMD$'),
                                                   digits = 2, title='Legislative friction (excl. 1990s)', digits.extra = 3, dep.var.labels = table.name[D],
                                                   notes.append = F, notes.align='l', column.sep.width = "0pt", model.numbers = T, style='apsr', 
                                                   add.lines = list(c('\\\\[-1.5ex] \\hline \\\\[-1.5ex] $\\sigma_y$', paste(round(c(sd[1], sd), 2))), 
                                                                    c('$\\bar{y}_0$', paste(round(c(mean[1], mean), 2)))),
                                                   label = 'fig:voteobstr_1990', omit.stat = c('adj.rsq', 'ser', 'rsq'), font.size = 'scriptsize' ))
  stargazer.table[[D]] <- paste(stargazer.table[[D]], sep='', collapse = '')
  if(D==1){stargazer.table[[D]] <- sub('\\& \\(1\\) \\& \\(2\\) \\& \\(3\\) \\& \\(4\\)', paste('Bandwidth &', bw.label, '\\\\\\\\[.5ex]'), stargazer.table[[D]])}  # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- sub('.*(\\& \\\\multicolumn\\{4.*)', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- paste('\\\\\\\\[-3ex]', sub('(.*}).*(\\$SMD)', '\\1 \\\\\\\\[1ex] \\2', stargazer.table[[D]]), sep='')}
  if(D!=length(datasets)){stargazer.table[[D]] <- sub('(.*)\\\\multicolumn\\{5.*', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
}
stargazer.table <- gsub('\\multicolumn{5}{l}{$^{*}$p $<$ .1; $^{**}$p $<$ .05; $^{***}$p $<$ .01}', '', stargazer.table, fixed=T)
notes = '$Notes$: *p$<$.1; **p$<$.05; ***p$<$.01. From second column all models of the form $R_{ic}= \\beta_1 SMD_{ic} + \\beta_2 MV_{ic} + \\beta_3 SMD_{ic} \\times MV_{ic} +\\varepsilon$, but only the first term is reported. Models control for age, gender, seniority, regional or nationwide PR mandate, and United Russia affilation. Column 1 is the difference in means for $bw=1$ including controls. Dependent variable is the share of voting absence out of all votes across the whole convocation. Sample 1 includes only dual included deputies, Sample 2 additionally includes all majoritarian candidates. Std.errors clustered at the deputy-convocation level. Optimal bandwidths \\citep{calonico2014robust} from top to bottom are: 0.16 and 0.15.'

write(gsub('\\[-4ex\\]', '[-5ex\\]', gsub('\\[.5ex\\]', '[-2ex\\]',  gsub('\\[-1.5ex\\]', '[-2.5ex\\]', gsub('\\[-3ex\\]', '[-4ex\\]', 
                                                                                                             paste(gsub(stargazer.table, pattern='\\end{tabular}',
                                                                                                                        replacement=paste('\\end{tabular}', '\n',
                                                                                                                                          '\\centerline{\\begin{minipage}{0.95\\textwidth}~\\', '\n',
                                                                                                                                          '\\footnotesize{' , notes, 
                                                                                                                                          '} \\end{minipage}}',  sep=''), fixed=T), sep='', collapse = ''))))), file='tables/TA9.tex')



# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table A12
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
utterance_dta <- fread("data/speeches.csv", encoding = 'UTF-8', na.strings = c('NA', ''))

bw.label_modified <- gsub('$\\\\Delta\\\\mu$', '', fixed=T, bw.label)
bw.label_modified <- gsub('^\\s{1,}\\\\&', '', bw.label_modified)
bw.label_modified <- gsub('\\*', '', bw.label_modified)
bw.label_modified <- gsub('\\\\', '', bw.label_modified)

table.name2 <- c('Local rep. (Dual only)',
                 'Local rep. (Dual and district)',
                 'Local rep. (2nd, dual only)',
                 'Local rep. (2nd, dual and district)',
                 'Law obstruction (Dual only)',
                 'Law obstruction (Dual and district)')
dependent <- c('home_ref_conv', 'home_ref_conv', 'LocDistPref_convocation', 'LocDistPref_convocation', 'resistance_conv', 'resistance_conv')
utterance_dta[, governor := log(gov_votemargin+1)]
voting_w_MV[, governor := log(gov_votemargin+1)]

datasets <- list()
datasets[[1]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T) & convocation<5]
datasets[[2]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') & convocation<5]
datasets[[3]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T) & convocation<5]
datasets[[4]] <- utterance_dta[ !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') & convocation<5]
datasets[[5]] <- voting_w_MV[ !duplicated(treat_assign) & (dual_candidate==T) & convocation<5]
datasets[[6]] <- voting_w_MV[ !duplicated(treat_assign) & (dual_candidate==T | list=='majoritarian') & convocation<5]

stargazer.table <- list()
for (D in 1:length(datasets)){
  models <- list(); sd <- NULL; mean <- NULL; unique.dep <- NULL; PR <- NULL
  datasets[[D]][, dependent := datasets[[D]][[dependent[D]]] ]
  
  for (i in 1:length(bw)){
    sd <- c(sd, sd(datasets[[D]][ abs(SMD_margin_victory) < bw[i]][['dependent']], na.rm=T))
    mean <- c(mean, mean(datasets[[D]][ abs(SMD_margin_victory) < bw[i] & list == 'PR'][['dependent']], na.rm=T))
    
    models[[i]] <- felm(dependent ~  I(SMD_margin_victory>0)*SMD_margin_victory*governor | convocation | 0 | treat_assign, data = datasets[[D]][ abs(SMD_margin_victory) < bw[i]])}
  
  stargazer.table[[D]] <- capture.output(stargazer(models, header = F, keep = c(1,5), covariate.labels = c('SMD', 'SMD:log(Governor voteshare)'),
                                                   digits = 3, title='List affiliation modified by governor vote margin', 
                                                   digits.extra = 3, dep.var.labels = table.name2[D],
                                                   notes = '\\parbox[t]{.4\\textwidth}{$Notes$: *p$<$.1; **p$<$.05; ***p$<$.01. \\\\ All models of the form 
                                                   $y= SMD + MV + GOV + SMD*MV*GOV$. Std errors clustered at the deputy-convocation level. 
                                                   Fixed effects at the convocation level.}',
                                                   notes.append = F, notes.align='l', column.sep.width = "10pt", model.numbers = T, style='apsr', 
                                                   add.lines = list(c('$\\bar{y}_0$', paste(round(mean, 3))), 
                                                                    c('$\\sigma_y$', paste(round(sd, 3)))),
                                                   label = 'governor', omit.stat = c('adj.rsq', 'ser', 'rsq'), font.size = 'scriptsize' ))
  stargazer.table[[D]] <- paste(stargazer.table[[D]], sep='', collapse = '')
  if(D==1){stargazer.table[[D]] <- sub('& (1) & (2) & (3)', paste('Bandwidth &', bw.label_modified), stargazer.table[[D]], fixed=T)}  # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- sub('& (1) & (2) & (3)\\\\ \\hline', '', stargazer.table[[D]], fixed=T)}  # OBS: change bw, change here
  if(D>1){stargazer.table[[D]] <- sub('.*(\\& \\\\multicolumn\\{3.*)', '\\1', stargazer.table[[D]])} # OBS: change bw, change here
  stargazer.table[[D]] <- sub('$\\bar{y}_0$', '\\hline $\\bar{y}_0$', stargazer.table[[D]], fixed=T)
  if(D>1){stargazer.table[[D]] <- paste('\\\\\\\\[-3ex]', sub('(.*}).*(\\$SMD)', '\\1 \\\\\\\\[1ex] \\2', stargazer.table[[D]]), sep='')}
  if(D!=length(datasets)){stargazer.table[[D]] <- sub('(.*)\\\\multicolumn\\{4.*', '\\1', stargazer.table[[D]])}} # OBS: change bw, change here

write(paste(stargazer.table, sep='', collapse = ''), file='tables/TA12.tex')





















